Single-cell analysis of human dataset.
library(dplyr)
library(Seurat)
library(ggplot2)
library(patchwork)
random_seed <- 12345
data <- readRDS('../data/raw/MARS-Fung.RDS')
raw_ann <- CreateSeuratObject(data$counts, meta.data = data$metadata)
raw_ann[['percent.mito']] <- PercentageFeatureSet(raw_ann, pattern = "^MT-")
raw_ann[['percent.ercc']] <- PercentageFeatureSet(raw_ann, pattern = "^ERCC-")
raw_ann[['percent.ribo']] <- PercentageFeatureSet(raw_ann, pattern = "^RP[LS]")
VlnPlot(raw_ann,
features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"),
ncol = 4)
FeatureScatter(raw_ann, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "Stage")
print(paste0("Before filtering: ", dim(raw_ann)[2], " cells ", dim(raw_ann)[1], " genes"))
## [1] "Before filtering: 1920 cells 57874 genes"
# Remove ERCC
raw_ann <- raw_ann[rownames(raw_ann)[!grepl('ERCC-', rownames(raw_ann))], ]
# Remove Zero stage
raw_ann <- raw_ann[, raw_ann@meta.data$Stage != 'Zero']
nbins <- 100
min_cells <- 2000
max_cells <- 35e3
min_genes <- 550
max_genes <- 4950
ggplot(raw_ann@meta.data, aes(x=nCount_RNA)) +
geom_histogram(bins = nbins) +
geom_vline(aes(xintercept=min_cells), linetype="dashed", color='red') +
geom_vline(aes(xintercept=max_cells), linetype="dashed", color='red')
ggplot(raw_ann@meta.data, aes(x=nFeature_RNA)) +
geom_histogram(bins = nbins) +
geom_vline(aes(xintercept=min_genes), linetype="dashed", color='red') +
geom_vline(aes(xintercept=max_genes), linetype="dashed", color='red')
ggplot(raw_ann@meta.data, aes(x=nCount_RNA, y=nFeature_RNA, color=percent.mito)) +
geom_point() +
scale_color_continuous(type = "viridis") +
geom_vline(aes(xintercept=min_cells), linetype="dashed", color='red') +
geom_vline(aes(xintercept=max_cells), linetype="dashed", color='red') +
geom_hline(aes(yintercept=min_genes), linetype="dashed", color='red') +
geom_hline(aes(yintercept=max_genes), linetype="dashed", color='red')
adata <- subset(raw_ann, subset =
nFeature_RNA > min_genes & nFeature_RNA < max_genes &
nCount_RNA > min_cells & nCount_RNA < max_cells &
percent.mito < 20)
adata <- CreateSeuratObject(adata@assays$RNA@counts, min.cells = 3, meta.data = adata@meta.data)
print(paste0("After filtering: ", dim(adata)[2], " cells ", dim(adata)[1], " genes"))
## [1] "After filtering: 1702 cells 23050 genes"
VlnPlot(adata,
features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"),
ncol = 4)
saveRDS(adata, file = "../data/processed/01_fung.filtered.RDS")
adata <- adata[, adata$Condition %in% c("VFG53_1025", "VFG53_-BMP_1101", "VFG53_-BMP+FGF_1109")]
dim(adata)
## [1] 23050 562
adata <- NormalizeData(adata)
adata <- FindVariableFeatures(adata, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(adata), 10)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(adata)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
plot1 + plot2
adata <- ScaleData(adata)
adata <- CellCycleScoring(adata,
s.features = cc.genes.updated.2019$s.genes,
g2m.features = cc.genes.updated.2019$g2m.genes,
set.ident = TRUE)
adata <- RunPCA(adata, npcs = 50)
ElbowPlot(adata)
DimPlot(adata, reduction = 'pca', group.by = 'Stage')
DimPlot(adata, reduction = 'pca', group.by = 'Phase')
Idents(adata) <- 'Stage'
commonR::plot_proportion_bar(adata, group.by = "Phase") + ggtitle("Cell cycle proportion per Stage")
adata <- FindNeighbors(adata, dims = 1:20)
adata <- FindClusters(adata, random.seed = random_seed, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 562
## Number of edges: 29045
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6803
## Number of communities: 2
## Elapsed time: 0 seconds
adata <- RunUMAP(adata, dims = 1:20, seed.use = random_seed)
DimPlot(adata, reduction = "umap")
DimPlot(adata, reduction = "umap", group.by = 'Stage')
DimPlot(adata, reduction = "umap", group.by = 'Phase')
markers <- FindAllMarkers(adata, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.5)
markers %>%
group_by(cluster) %>%
slice_max(n = 10, order_by = avg_log2FC)
adata <- FindNeighbors(adata, dims = 1:20)
adata <- FindClusters(adata, random.seed = random_seed, resolution = 0.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 562
## Number of edges: 29045
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6357
## Number of communities: 3
## Elapsed time: 0 seconds
adata <- RunUMAP(adata, dims = 1:20, seed.use = random_seed)
DimPlot(adata, reduction = "umap")
DimPlot(adata, reduction = "umap", group.by = 'Stage')
DimPlot(adata, reduction = "umap", group.by = 'Phase')
markers <- FindAllMarkers(adata, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.5)
markers %>%
group_by(cluster) %>%
slice_max(n = 10, order_by = avg_log2FC)
saveRDS(adata, file = '../data/processed/01_fung.RDS')
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.1.1 ggplot2_3.3.5 SeuratObject_4.0.4 Seurat_4.1.0
## [5] dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_2.0-2 ggsignif_0.6.3
## [4] deldir_1.0-6 ellipsis_0.3.2 ggridges_0.5.3
## [7] rstudioapi_0.13 spatstat.data_2.1-2 leiden_0.3.9
## [10] listenv_0.8.0 farver_2.1.0 ggrepel_0.9.1
## [13] RSpectra_0.16-0 fansi_1.0.2 codetools_0.2-18
## [16] splines_4.1.2 knitr_1.37 polyclip_1.10-0
## [19] jsonlite_1.7.2 ica_1.0-2 cluster_2.1.2
## [22] png_0.1-7 uwot_0.1.11 shiny_1.7.1
## [25] sctransform_0.3.3 spatstat.sparse_2.1-0 compiler_4.1.2
## [28] httr_1.4.2 Matrix_1.4-0 fastmap_1.1.0
## [31] lazyeval_0.2.2 cli_3.1.0 limma_3.50.0
## [34] later_1.3.0 htmltools_0.5.2 tools_4.1.2
## [37] igraph_1.2.11 gtable_0.3.0 glue_1.6.0
## [40] RANN_2.6.1 reshape2_1.4.4 tinytex_0.36
## [43] Rcpp_1.0.8 scattermore_0.7 jquerylib_0.1.4
## [46] vctrs_0.3.8 nlme_3.1-155 lmtest_0.9-39
## [49] xfun_0.29 stringr_1.4.0 globals_0.14.0
## [52] mime_0.12 miniUI_0.1.1.1 lifecycle_1.0.1
## [55] irlba_2.3.5 commonR_0.1.0 goftest_1.2-3
## [58] future_1.23.0 MASS_7.3-55 zoo_1.8-9
## [61] scales_1.1.1 spatstat.core_2.3-2 promises_1.2.0.1
## [64] spatstat.utils_2.3-0 parallel_4.1.2 RColorBrewer_1.1-2
## [67] yaml_2.2.1 reticulate_1.23 pbapply_1.5-0
## [70] gridExtra_2.3 sass_0.4.0 rpart_4.1-15
## [73] stringi_1.7.6 highr_0.9 rlang_0.4.12
## [76] pkgconfig_2.0.3 matrixStats_0.61.0 evaluate_0.14
## [79] lattice_0.20-45 ROCR_1.0-11 purrr_0.3.4
## [82] tensor_1.5 htmlwidgets_1.5.4 labeling_0.4.2
## [85] cowplot_1.1.1 tidyselect_1.1.1 parallelly_1.30.0
## [88] RcppAnnoy_0.0.19 plyr_1.8.6 magrittr_2.0.1
## [91] R6_2.5.1 generics_0.1.1 DBI_1.1.2
## [94] pillar_1.6.4 withr_2.4.3 mgcv_1.8-38
## [97] fitdistrplus_1.1-6 survival_3.2-13 abind_1.4-5
## [100] tibble_3.1.6 future.apply_1.8.1 crayon_1.4.2
## [103] KernSmooth_2.23-20 utf8_1.2.2 spatstat.geom_2.3-1
## [106] plotly_4.10.0 rmarkdown_2.11 grid_4.1.2
## [109] data.table_1.14.2 digest_0.6.29 xtable_1.8-4
## [112] tidyr_1.1.4 httpuv_1.6.5 munsell_0.5.0
## [115] viridisLite_0.4.0 bslib_0.3.1